from Dataloader.weiboloader import WeiboSet
from gensim.corpora.dictionary import Dictionary
from gensim.models.ldamodel import LdaModel
from gensim.test.utils import datapath
from gensim.test.utils import get_tmpfile

dev = WeiboSet(max_seq_len=10000)
dev.load_data(weibo_dir="../data/Weibo", weibo_file="../data/weibo_ids.csv", weibo_df=None, cached_prefix=None)
dev.Caches_Data(data_prefix="../data/WeiboAll")

all_sents = [sent for ID in dev.data_ID for sent in dev.data[ID]['text']]
dictionary = Dictionary(all_sents)
tmp_fname = get_tmpfile("../saved/weibo_dictionary.txt")
dictionary.save_as_text(tmp_fname)

bow = [dictionary.doc2bow(sent) for sent in all_sents]
for num_topics in [10, 20, 50, 80, 90, 100]:
    lda = LdaModel(bow, num_topics=10)
    temp_file = datapath("../saved/weibo_lda_%d.pkl"%num_topics)
    lda.save(temp_file)